library(tidyverse)
library(dplyr, warn.conflicts = FALSE)
library(here)
library(plotly)
library(scales)
library(highcharter)
library(stringr)
theme_set(theme_bw())
data <- read_csv(here::here("data/jackson.csv"),
col_types = cols(
.default = col_double(),
album_uri = col_character(),
album_name = col_character(),
album_img = col_character(),
album_release_date = col_character(),
album_release_year = col_date(format = ""),
album_popularity = col_integer(),
track_name = col_character(),
track_uri = col_character(),
key = col_character(),
mode = col_character(),
time_signature = col_integer(),
key_mode = col_character(),
track_popularity = col_integer()
)) %>%
mutate(album_name = gsub(".*(1954).*",
"The Music of Brazil/Jackson do Pandeiro",
album_name));
data %>%
glimpse()
Observations: 500
Variables: 23
$ album_uri <chr> "5T9tTjPIfjbUJGRJdYOOLl", "5T9tTjPIfjbUJGRJdYOOLl", "5T9tTjPIfjb...
$ album_name <chr> "Jackson Do Pandeiro Volume 1: Tum, Tum, Tum!", "Jackson Do Pand...
$ album_img <chr> "https://i.scdn.co/image/5dcc4a0cad740f1ee0774196d0a14f3693ef887...
$ album_release_date <chr> "1958-11-11", "1958-11-11", "1958-11-11", "1958-11-11", "1958-11...
$ album_release_year <date> 1958-11-11, 1958-11-11, 1958-11-11, 1958-11-11, 1958-11-11, 195...
$ album_popularity <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ track_name <chr> "Tum, Tum, Tum", "Pacífico Pacato", "Nortista Quatrocentão", "Se...
$ track_uri <chr> "6cCYhV6fU68uzbjWPG9V7x", "6Gu7y9SgtVTGh8YGhDPtCe", "1hq7M7cJtvD...
$ danceability <dbl> 0.501, 0.663, 0.550, 0.447, 0.544, 0.571, 0.495, 0.572, 0.500, 0...
$ energy <dbl> 0.987, 0.962, 0.947, 0.969, 0.972, 0.926, 0.967, 0.986, 0.947, 0...
$ key <chr> "A", "F", "D", "G", "E", "F", "E", "C", "F", "A#", "E", "F", "D#...
$ loudness <dbl> 2.561, 1.137, 1.621, 2.743, 2.513, 2.414, 2.375, 2.597, 3.078, 3...
$ mode <chr> "major", "major", "major", "major", "minor", "major", "minor", "...
$ speechiness <dbl> 0.0429, 0.1810, 0.0469, 0.0549, 0.0502, 0.0344, 0.0576, 0.0367, ...
$ acousticness <dbl> 0.718, 0.738, 0.666, 0.759, 0.787, 0.651, 0.712, 0.194, 0.286, 0...
$ instrumentalness <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ liveness <dbl> 0.282, 0.200, 0.251, 0.333, 0.176, 0.342, 0.321, 0.301, 0.323, 0...
$ valence <dbl> 0.963, 0.961, 0.923, 0.899, 0.783, 0.961, 0.755, 0.989, 0.957, 0...
$ tempo <dbl> 101.676, 113.562, 116.125, 116.023, 112.863, 133.065, 117.822, 1...
$ duration_ms <dbl> 158133, 139773, 163173, 143733, 151653, 157480, 158133, 154680, ...
$ time_signature <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4...
$ key_mode <chr> "A major", "F major", "D major", "G major", "E minor", "F major"...
$ track_popularity <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0...
data %>%
ggplot(aes(sample=danceability)) +
stat_qq()
hchart (data$danceability,
color = "#B71C1C",
name = "Dançabilidade")
data %>%
ggplot(aes(sample=speechiness)) +
stat_qq()
hchart (data$speechiness,
color = "#B71C1C",
name = "Verbosidade")
data <- data %>%
mutate(duration_s = duration_ms/1000)
data %>%
select(duration_s) %>%
glimpse()
Observations: 500
Variables: 1
$ duration_s <dbl> 158.133, 139.773, 163.173, 143.733, 151.653, 157.480, 158.133, 154.680, ...
data %>%
ggplot(aes(sample=duration_s)) +
stat_qq()
hchart (data$duration_s,
color = "#B71C1C",
name = "Duração (s)")
Ultimo album “1981: Isso é que é Forró!”
data <- data %>%
mutate(remaster = album_release_date > "1981-30-12")
data %>%
select(album_name, album_release_year, remaster) %>%
sample_n(10)
temp <- data %>%
distinct(album_name,
.keep_all = TRUE) %>%
mutate(remaster = ifelse(remaster == TRUE,"remasterizado","original"))
hchart(temp$remaster,
colorByPoint = TRUE,
name="Álbum")
p <- data %>%
distinct(track_name, .keep_all = TRUE) %>%
ggplot(aes(x=speechiness,
y=danceability)) +
geom_point(alpha=0.4)
ggplotly(p)
A dançabilidade das músicas de Jackson
data %>%
distinct(track_name, .keep_all = TRUE) %>%
ggplot(aes(speechiness,danceability)) +
stat_density2d(aes(fill = ..level..), geom = "polygon")
data %>%
group_by(track_name) %>%
top_n(10, speechiness)
temp <-
data %>%
distinct(album_name, .keep_all = TRUE) %>%
group_by(album_release_year) %>%
summarise(original_n = sum(!remaster),
remaster_n = sum(remaster))
highchart() %>%
hc_xAxis(categories = temp$album_release_year) %>%
hc_add_series(temp$original_n,
type = "column",
color = "#B71C1C",
name = "Não remasterizado") %>%
hc_add_series(temp$remaster_n,
type = "column",
name = "Remasterizado") %>%
hc_title(text = "Número de álbuns por ano")
m <- list(
l =70,
b = 150)
p <- data %>%
ggplot(aes(x=as.factor(album_release_year),
duration_s,
group=album_release_year,
color=remaster)) +
geom_boxplot(position = "dodge", alpha=0.6) +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
ggplotly(p) %>%
layout(autosize = F, margin=m)
# lollipop chart
m <- list(
l = 370)
p <- data %>%
ggplot(aes(album_popularity,y=reorder(album_name,album_popularity),
color=remaster,
group=remaster)) +
geom_segment(aes(x = 0, y = reorder(album_name,album_popularity),
xend = album_popularity,
yend = album_name)) +
geom_point() +
theme(axis.title.y=element_blank())
ggplotly(p,tooltip=NA) %>%
layout(autosize = F,
margin = m)
# tim12equal = c("#00008F", "#0000EA", "#0047FF", "#00A2FF", "#00FEFF", "#5AFFA5", "#B5FF4A", "#FFED00", "#FF9200", "#FF3700", "#DB0000", "#800000")
#
# m <- list(
# b = 70);
#
# p <-
# data %>%
# select(key,album_release_date) %>%
# group_by(album_release_date,key) %>%
# summarise(count = n()) %>%
# mutate(prop = count/sum(count)) %>%
# ungroup() %>%
# ggplot(aes(x = factor(album_release_date), y = count, fill = key)) +
# geom_bar(stat = "identity", position = "fill", width = .7) +
# scale_y_continuous(labels = scales::percent) +
# scale_fill_manual(values = tim12equal) +
# theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
# ggtitle("Distribuição de notas musicais temporalmente")
#
#
# ggplotly(p) %>%
# layout(autosize = F,
# margin = m)
tim12equal = c("#00008F", "#0000EA", "#0047FF", "#00A2FF", "#00FEFF", "#5AFFA5", "#B5FF4A", "#FFED00", "#FF9200", "#FF3700", "#DB0000", "#800000")
m <- list(
l=80,
r=30);
p <- data %>%
select(key,album_release_date, remaster) %>%
group_by(album_release_date,key,remaster) %>%
summarise(count = n()) %>%
mutate(prop = count/sum(count)) %>%
ungroup() %>%
ggplot(aes(x = factor(album_release_date), y = count, fill = key)) +
geom_bar(stat = "identity", position = "fill", width = .7) +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = tim12equal) +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
theme(axis.title.x=element_blank(),
axis.title.y=element_blank()) +
facet_grid(remaster ~ .) +
ggtitle("Notas musicais (distinção por remasterização)") +
guides(fill=guide_legend(title=""))
ggplotly(p) %>%
layout(autosize = F,
margin = m,
legend = list(
orientation = "h", y =-0.2
))
É possível ver que no começo dos anos 1960 ínicio as músicas na escala entre E e G dominam fortemente o repertório de Jackson, curiosamente quando as músicas de notas entre A e C chegam a ocupar metade das ocorrências. Isso está de acordo com o perfil de Jackson, notório por sua heterogeneidade.